2022-03-31

Forecasting: Principles and Practice

Hyndman, R.J., & Athanasopoulos, G. (2021) Forecasting: principles and practice, 3rd edition, OTexts: Melbourne, Australia. OTexts.com/fpp3. Accessed on 2/26/22
library(fpp3)
library(fable.prophet)
## Loading required package: Rcpp

Preface

library(fpp3)
ggplot2::theme_set(theme_minimal())

Forecasting: Principles and Practice

library(fpp3)

Preface

library(fpp3)

2.1 tsibble objects

olympic_running
## # A tsibble: 312 x 4 [4Y]
## # Key:       Length, Sex [14]
##     Year Length Sex    Time
##    <int>  <int> <chr> <dbl>
##  1  1896    100 men    12  
##  2  1900    100 men    11  
##  3  1904    100 men    11  
##  4  1908    100 men    10.8
##  5  1912    100 men    10.8
##  6  1916    100 men    NA  
##  7  1920    100 men    10.8
##  8  1924    100 men    10.6
##  9  1928    100 men    10.8
## 10  1932    100 men    10.3
## # … with 302 more rows
olympic_running %>% distinct(Sex)
## # A tibble: 2 × 1
##   Sex  
##   <chr>
## 1 men  
## 2 women
PBS
## # A tsibble: 67,596 x 9 [1M]
## # Key:       Concession, Type, ATC1, ATC2 [336]
##       Month Concession   Type      ATC1  ATC1_desc ATC2  ATC2_desc Scripts  Cost
##       <mth> <chr>        <chr>     <chr> <chr>     <chr> <chr>       <dbl> <dbl>
##  1 1991 Jul Concessional Co-payme… A     Alimenta… A01   STOMATOL…   18228 67877
##  2 1991 Aug Concessional Co-payme… A     Alimenta… A01   STOMATOL…   15327 57011
##  3 1991 Sep Concessional Co-payme… A     Alimenta… A01   STOMATOL…   14775 55020
##  4 1991 Oct Concessional Co-payme… A     Alimenta… A01   STOMATOL…   15380 57222
##  5 1991 Nov Concessional Co-payme… A     Alimenta… A01   STOMATOL…   14371 52120
##  6 1991 Dec Concessional Co-payme… A     Alimenta… A01   STOMATOL…   15028 54299
##  7 1992 Jan Concessional Co-payme… A     Alimenta… A01   STOMATOL…   11040 39753
##  8 1992 Feb Concessional Co-payme… A     Alimenta… A01   STOMATOL…   15165 54405
##  9 1992 Mar Concessional Co-payme… A     Alimenta… A01   STOMATOL…   16898 61108
## 10 1992 Apr Concessional Co-payme… A     Alimenta… A01   STOMATOL…   18141 65356
## # … with 67,586 more rows
PBS %>%
  filter(ATC2 == "A10")
## # A tsibble: 816 x 9 [1M]
## # Key:       Concession, Type, ATC1, ATC2 [4]
##       Month Concession   Type     ATC1  ATC1_desc ATC2  ATC2_desc Scripts   Cost
##       <mth> <chr>        <chr>    <chr> <chr>     <chr> <chr>       <dbl>  <dbl>
##  1 1991 Jul Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   89733 2.09e6
##  2 1991 Aug Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   77101 1.80e6
##  3 1991 Sep Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   76255 1.78e6
##  4 1991 Oct Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   78681 1.85e6
##  5 1991 Nov Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   70554 1.69e6
##  6 1991 Dec Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   75814 1.84e6
##  7 1992 Jan Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   64186 1.56e6
##  8 1992 Feb Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   75899 1.73e6
##  9 1992 Mar Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   89445 2.05e6
## 10 1992 Apr Concessional Co-paym… A     Alimenta… A10   ANTIDIAB…   97315 2.23e6
## # … with 806 more rows
PBS %>%
  filter(ATC2 == "A10") %>%
  select(Month, Concession, Type, Cost)
## # A tsibble: 816 x 4 [1M]
## # Key:       Concession, Type [4]
##       Month Concession   Type           Cost
##       <mth> <chr>        <chr>         <dbl>
##  1 1991 Jul Concessional Co-payments 2092878
##  2 1991 Aug Concessional Co-payments 1795733
##  3 1991 Sep Concessional Co-payments 1777231
##  4 1991 Oct Concessional Co-payments 1848507
##  5 1991 Nov Concessional Co-payments 1686458
##  6 1991 Dec Concessional Co-payments 1843079
##  7 1992 Jan Concessional Co-payments 1564702
##  8 1992 Feb Concessional Co-payments 1732508
##  9 1992 Mar Concessional Co-payments 2046102
## 10 1992 Apr Concessional Co-payments 2225977
## # … with 806 more rows
PBS %>%
  filter(ATC2 == "A10") %>%
  select(Month, Concession, Type, Cost) %>%
  summarise(TotalC = sum(Cost))
## # A tsibble: 204 x 2 [1M]
##       Month  TotalC
##       <mth>   <dbl>
##  1 1991 Jul 3526591
##  2 1991 Aug 3180891
##  3 1991 Sep 3252221
##  4 1991 Oct 3611003
##  5 1991 Nov 3565869
##  6 1991 Dec 4306371
##  7 1992 Jan 5088335
##  8 1992 Feb 2814520
##  9 1992 Mar 2985811
## 10 1992 Apr 3204780
## # … with 194 more rows
PBS %>%
  filter(ATC2 == "A10") %>%
  select(Month, Concession, Type, Cost) %>%
  summarise(TotalC = sum(Cost)) %>%
  mutate(Cost = TotalC/1e6)
## # A tsibble: 204 x 3 [1M]
##       Month  TotalC  Cost
##       <mth>   <dbl> <dbl>
##  1 1991 Jul 3526591  3.53
##  2 1991 Aug 3180891  3.18
##  3 1991 Sep 3252221  3.25
##  4 1991 Oct 3611003  3.61
##  5 1991 Nov 3565869  3.57
##  6 1991 Dec 4306371  4.31
##  7 1992 Jan 5088335  5.09
##  8 1992 Feb 2814520  2.81
##  9 1992 Mar 2985811  2.99
## 10 1992 Apr 3204780  3.20
## # … with 194 more rows
PBS %>%
  filter(ATC2 == "A10") %>%
  select(Month, Concession, Type, Cost) %>%
  summarise(TotalC = sum(Cost)) %>%
  mutate(Cost = TotalC / 1e6) -> a10
prison <- readr::read_csv("https://OTexts.com/fpp3/extrafiles/prison_population.csv")
## Rows: 3072 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): State, Gender, Legal, Indigenous
## dbl  (1): Count
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
prison <- prison %>%
  mutate(Quarter = yearquarter(Date)) %>%
  select(-Date) %>%
  as_tsibble(key = c(State, Gender, Legal, Indigenous),
             index = Quarter)

prison
## # A tsibble: 3,072 x 6 [1Q]
## # Key:       State, Gender, Legal, Indigenous [64]
##    State Gender Legal    Indigenous Count Quarter
##    <chr> <chr>  <chr>    <chr>      <dbl>   <qtr>
##  1 ACT   Female Remanded ATSI           0 2005 Q1
##  2 ACT   Female Remanded ATSI           1 2005 Q2
##  3 ACT   Female Remanded ATSI           0 2005 Q3
##  4 ACT   Female Remanded ATSI           0 2005 Q4
##  5 ACT   Female Remanded ATSI           1 2006 Q1
##  6 ACT   Female Remanded ATSI           1 2006 Q2
##  7 ACT   Female Remanded ATSI           1 2006 Q3
##  8 ACT   Female Remanded ATSI           0 2006 Q4
##  9 ACT   Female Remanded ATSI           0 2007 Q1
## 10 ACT   Female Remanded ATSI           1 2007 Q2
## # … with 3,062 more rows

2.2 Time plots

melsyd_economy <- ansett %>%
  filter(Airports == "MEL-SYD", Class == "Economy") %>%
  mutate(Passengers = Passengers/1000)
autoplot(melsyd_economy, Passengers) +
  labs(title = "Ansett airlines economy class",
       subtitle = "Melbourne-Sydney",
       y = "Passengers ('000)")

autoplot(a10, Cost) +
  labs(y = "$ (millions)",
       title = "Australian antidiabetic drug sales")

2.4 Seasonal plots

a10 %>%
  gg_season(Cost, labels = "both") +
  labs(y = "$ (millions)",
       title = "Seasonal plot: Antidiabetic drug sales")

vic_elec %>% gg_season(Demand, period = "day") +
  theme(legend.position = "none") +
  labs(y="MWh", title="Electricity demand: Victoria")

vic_elec %>% gg_season(Demand, period = "week") +
  theme(legend.position = "none") +
  labs(y="MWh", title="Electricity demand: Victoria")

vic_elec %>% gg_season(Demand, period = "year") +
  labs(y="MWh", title="Electricity demand: Victoria")

# 2.5 Seasonal subseries plots

a10 %>%
  gg_subseries(Cost) +
  labs(
    y = "$ (millions)",
    title = "Australian antidiabetic drug sales"
  )

holidays <- tourism %>%
  filter(Purpose == "Holiday") %>%
  group_by(State) %>%
  summarise(Trips = sum(Trips))
holidays
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter Trips
##    <chr>   <qtr> <dbl>
##  1 ACT   1998 Q1  196.
##  2 ACT   1998 Q2  127.
##  3 ACT   1998 Q3  111.
##  4 ACT   1998 Q4  170.
##  5 ACT   1999 Q1  108.
##  6 ACT   1999 Q2  125.
##  7 ACT   1999 Q3  178.
##  8 ACT   1999 Q4  218.
##  9 ACT   2000 Q1  158.
## 10 ACT   2000 Q2  155.
## # … with 630 more rows
autoplot(holidays, Trips) +
  labs(y = "Overnight trips ('000)",
       title = "Australian domestic holidays")

gg_season(holidays, Trips) +
  labs(y = "Overnight trips ('000)",
       title = "Australian domestic holidays")

holidays %>%
  gg_subseries(Trips) +
  labs(y = "Overnight trips ('000)",
       title = "Australian domestic holidays")

# 2.6 Scatterplots

vic_elec %>%
  filter(year(Time) == 2014) %>%
  autoplot(Demand) +
  labs(y = "GW",
       title = "Half-hourly electricity demand: Victoria")

vic_elec %>%
  filter(year(Time) == 2014) %>%
  autoplot(Temperature) +
  labs(
    y = "Degrees Celsius",
    title = "Half-hourly temperatures: Melbourne, Australia"
  )

vic_elec %>%
  filter(year(Time) == 2014) %>%
  ggplot(aes(x = Temperature, y = Demand)) +
  geom_point() +
  labs(x = "Temperature (degrees Celsius)",
       y = "Electricity demand (GW)")

visitors <- tourism %>%
  group_by(State) %>%
  summarise(Trips = sum(Trips))
visitors %>%
  ggplot(aes(x = Quarter, y = Trips)) +
  geom_line() +
  facet_grid(vars(State), scales = "free_y") +
  labs(title = "Australian domestic tourism",
       y= "Overnight trips ('000)")

visitors %>%
  pivot_wider(values_from=Trips, names_from=State) %>%
  GGally::ggpairs(columns = 2:9)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

# 2.7 Lag plots

recent_production <- aus_production %>%
  filter(year(Quarter) >= 2000)
recent_production %>%
  gg_lag(Beer, geom = "point") +
  labs(x = "lag(Beer, k)")

# 2.8 Autocorrelation

recent_production %>% ACF(Beer, lag_max = 9)
## # A tsibble: 9 x 2 [1Q]
##     lag      acf
##   <lag>    <dbl>
## 1    1Q -0.0530 
## 2    2Q -0.758  
## 3    3Q -0.0262 
## 4    4Q  0.802  
## 5    5Q -0.0775 
## 6    6Q -0.657  
## 7    7Q  0.00119
## 8    8Q  0.707  
## 9    9Q -0.0888
recent_production %>%
  ACF(Beer) %>%
  autoplot() + labs(title="Australian beer production")

a10 %>%
  ACF(Cost, lag_max = 48) %>%
  autoplot() +
  labs(title="Australian antidiabetic drug sales")

# 2.9 White noise

set.seed(30)
y <- tsibble(sample = 1:50, wn = rnorm(50), index = sample)
y %>% autoplot(wn) + labs(title = "White noise", y = "")

y %>%
  ACF(wn) %>%
  autoplot() + labs(title = "White noise")

# 2.10 Exercises

set.seed(12345678)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
dgoog <- gafa_stock %>%
  filter(Symbol == "GOOG", year(Date) >= 2018) %>%
  mutate(trading_day = row_number()) %>%
  update_tsibble(index = trading_day, regular = TRUE) %>%
  mutate(diff = difference(Close))

3.1 Transformations and adjustments

global_economy %>%
  filter(Country == "Australia") %>%
  autoplot(GDP/Population) +
  labs(title= "GDP per capita", y = "$US")

print_retail <- aus_retail %>%
  filter(Industry == "Newspaper and book retailing") %>%
  group_by(Industry) %>%
  index_by(Year = year(Month)) %>%
  summarise(Turnover = sum(Turnover))
aus_economy <- global_economy %>%
  filter(Code == "AUS")
print_retail %>%
  left_join(aus_economy, by = "Year") %>%
  mutate(Adjusted_turnover = Turnover / CPI * 100) %>%
  pivot_longer(c(Turnover, Adjusted_turnover),
               values_to = "Turnover") %>%
  mutate(name = factor(name,
         levels=c("Turnover","Adjusted_turnover"))) %>%
  ggplot(aes(x = Year, y = Turnover)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y") +
  labs(title = "Turnover: Australian print media industry",
       y = "$AU")
## Warning: Removed 1 row(s) containing missing values (geom_path).

lambda <- aus_production %>%
  features(Gas, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Gas, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed gas production with $\\lambda$ = ",
         round(lambda,2))))

# 3.2 Time series components

us_retail_employment <- us_employment %>%
  filter(year(Month) >= 1990, Title == "Retail Trade") %>%
  select(-Series_ID)
autoplot(us_retail_employment, Employed) +
  labs(y = "Persons (thousands)",
       title = "Total employment in US retail")

dcmp <- us_retail_employment %>%
  model(stl = STL(Employed))
components(dcmp)
## # A dable: 357 x 7 [1M]
## # Key:     .model [1]
## # :        Employed = trend + season_year + remainder
##    .model    Month Employed  trend season_year remainder season_adjust
##    <chr>     <mth>    <dbl>  <dbl>       <dbl>     <dbl>         <dbl>
##  1 stl    1990 Jan   13256. 13288.      -33.0      0.836        13289.
##  2 stl    1990 Feb   12966. 13269.     -258.     -44.6          13224.
##  3 stl    1990 Mar   12938. 13250.     -290.     -22.1          13228.
##  4 stl    1990 Apr   13012. 13231.     -220.       1.05         13232.
##  5 stl    1990 May   13108. 13211.     -114.      11.3          13223.
##  6 stl    1990 Jun   13183. 13192.      -24.3     15.5          13207.
##  7 stl    1990 Jul   13170. 13172.      -23.2     21.6          13193.
##  8 stl    1990 Aug   13160. 13151.       -9.52    17.8          13169.
##  9 stl    1990 Sep   13113. 13131.      -39.5     22.0          13153.
## 10 stl    1990 Oct   13185. 13110.       61.6     13.2          13124.
## # … with 347 more rows
components(dcmp) %>%
  as_tsibble() %>%
  autoplot(Employed, colour="gray") +
  geom_line(aes(y=trend), colour = "#D55E00") +
  labs(
    y = "Persons (thousands)",
    title = "Total employment in US retail"
  )

components(dcmp) %>% autoplot()

components(dcmp) %>%
  as_tsibble() %>%
  autoplot(Employed, colour = "gray") +
  geom_line(aes(y=season_adjust), colour = "#0072B2") +
  labs(y = "Persons (thousands)",
       title = "Total employment in US retail")

# 3.3 Moving averages

global_economy %>%
  filter(Country == "Australia") %>%
  autoplot(Exports) +
  labs(y = "% of GDP", title = "Total Australian exports")

aus_exports <- global_economy %>%
  filter(Country == "Australia") %>%
  mutate(
    `5-MA` = slider::slide_dbl(Exports, mean,
                .before = 2, .after = 2, .complete = TRUE)
  )
aus_exports %>%
  autoplot(Exports) +
  geom_line(aes(y = `5-MA`), colour = "#D55E00") +
  labs(y = "% of GDP",
       title = "Total Australian exports") +
  guides(colour = guide_legend(title = "series"))
## Warning: Removed 4 row(s) containing missing values (geom_path).

beer <- aus_production %>%
  filter(year(Quarter) >= 1992) %>%
  select(Quarter, Beer)
beer_ma <- beer %>%
  mutate(
    `4-MA` = slider::slide_dbl(Beer, mean,
                .before = 1, .after = 2, .complete = TRUE),
    `2x4-MA` = slider::slide_dbl(`4-MA`, mean,
                .before = 1, .after = 0, .complete = TRUE)
  )
us_retail_employment_ma <- us_retail_employment %>%
  mutate(
    `12-MA` = slider::slide_dbl(Employed, mean,
                .before = 5, .after = 6, .complete = TRUE),
    `2x12-MA` = slider::slide_dbl(`12-MA`, mean,
                .before = 1, .after = 0, .complete = TRUE)
  )
us_retail_employment_ma %>%
  autoplot(Employed, colour = "gray") +
  geom_line(aes(y = `2x12-MA`), colour = "#D55E00") +
  labs(y = "Persons (thousands)",
       title = "Total employment in US retail")
## Warning: Removed 12 row(s) containing missing values (geom_path).

# 3.4 Classical decomposition

us_retail_employment %>%
  model(
    classical_decomposition(Employed, type = "additive")
  ) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical additive decomposition of total
                  US retail employment")
## Warning: Removed 6 row(s) containing missing values (geom_path).

# 3.5 Methods used by official statistics agencies

x11_dcmp <- us_retail_employment %>%
  model(x11 = X_13ARIMA_SEATS(Employed ~ x11())) %>%
  components()
autoplot(x11_dcmp) +
  labs(title =
    "Decomposition of total US retail employment using X-11.")

x11_dcmp %>%
  ggplot(aes(x = Month)) +
  geom_line(aes(y = Employed, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Persons (thousands)",
       title = "Total employment in US retail") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

x11_dcmp %>%
  gg_subseries(seasonal)

seats_dcmp <- us_retail_employment %>%
  model(seats = X_13ARIMA_SEATS(Employed ~ seats())) %>%
  components()
autoplot(seats_dcmp) +
  labs(title =
    "Decomposition of total US retail employment using SEATS")

# 3.6 STL decomposition

us_retail_employment %>%
  model(
    STL(Employed ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) %>%
  components() %>%
  autoplot()

# 3.7 Exercises

gas <- tail(aus_production, 5*4) %>% select(Gas)

4.1 Some simple statistics

tourism %>%
  features(Trips, list(mean = mean)) %>%
  arrange(mean)
## # A tibble: 304 × 4
##    Region          State              Purpose   mean
##    <chr>           <chr>              <chr>    <dbl>
##  1 Kangaroo Island South Australia    Other    0.340
##  2 MacDonnell      Northern Territory Other    0.449
##  3 Wilderness West Tasmania           Other    0.478
##  4 Barkly          Northern Territory Other    0.632
##  5 Clare Valley    South Australia    Other    0.898
##  6 Barossa         South Australia    Other    1.02 
##  7 Kakadu Arnhem   Northern Territory Other    1.04 
##  8 Lasseter        Northern Territory Other    1.14 
##  9 Wimmera         Victoria           Other    1.15 
## 10 MacDonnell      Northern Territory Visiting 1.18 
## # … with 294 more rows
tourism %>% features(Trips, quantile)
## # A tibble: 304 × 8
##    Region         State             Purpose    `0%`  `25%`   `50%`  `75%` `100%`
##    <chr>          <chr>             <chr>     <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
##  1 Adelaide       South Australia   Busine…  68.7   134.   153.    177.   242.  
##  2 Adelaide       South Australia   Holiday 108.    135.   154.    172.   224.  
##  3 Adelaide       South Australia   Other    25.9    43.9   53.8    62.5  107.  
##  4 Adelaide       South Australia   Visiti… 137.    179.   206.    229.   270.  
##  5 Adelaide Hills South Australia   Busine…   0       0      1.26    3.92  28.6 
##  6 Adelaide Hills South Australia   Holiday   0       5.77   8.52   14.1   35.8 
##  7 Adelaide Hills South Australia   Other     0       0      0.908   2.09   8.95
##  8 Adelaide Hills South Australia   Visiti…   0.778   8.91  12.2    16.8   81.1 
##  9 Alice Springs  Northern Territo… Busine…   1.01    9.13  13.3    18.5   34.1 
## 10 Alice Springs  Northern Territo… Holiday   2.81   16.9   31.5    44.8   76.5 
## # … with 294 more rows

4.2 ACF features

tourism %>% features(Trips, feat_acf)
## # A tibble: 304 × 10
##    Region         State Purpose     acf1 acf10 diff1_acf1 diff1_acf10 diff2_acf1
##    <chr>          <chr> <chr>      <dbl> <dbl>      <dbl>       <dbl>      <dbl>
##  1 Adelaide       Sout… Busine…  0.0333  0.131     -0.520       0.463     -0.676
##  2 Adelaide       Sout… Holiday  0.0456  0.372     -0.343       0.614     -0.487
##  3 Adelaide       Sout… Other    0.517   1.15      -0.409       0.383     -0.675
##  4 Adelaide       Sout… Visiti…  0.0684  0.294     -0.394       0.452     -0.518
##  5 Adelaide Hills Sout… Busine…  0.0709  0.134     -0.580       0.415     -0.750
##  6 Adelaide Hills Sout… Holiday  0.131   0.313     -0.536       0.500     -0.716
##  7 Adelaide Hills Sout… Other    0.261   0.330     -0.253       0.317     -0.457
##  8 Adelaide Hills Sout… Visiti…  0.139   0.117     -0.472       0.239     -0.626
##  9 Alice Springs  Nort… Busine…  0.217   0.367     -0.500       0.381     -0.658
## 10 Alice Springs  Nort… Holiday -0.00660 2.11      -0.153       2.11      -0.274
## # … with 294 more rows, and 2 more variables: diff2_acf10 <dbl>,
## #   season_acf1 <dbl>

4.3 STL Features

tourism %>%
  features(Trips, feat_stl)
## # A tibble: 304 × 12
##    Region         State Purpose trend_strength seasonal_streng… seasonal_peak_y…
##    <chr>          <chr> <chr>            <dbl>            <dbl>            <dbl>
##  1 Adelaide       Sout… Busine…          0.464            0.407                3
##  2 Adelaide       Sout… Holiday          0.554            0.619                1
##  3 Adelaide       Sout… Other            0.746            0.202                2
##  4 Adelaide       Sout… Visiti…          0.435            0.452                1
##  5 Adelaide Hills Sout… Busine…          0.464            0.179                3
##  6 Adelaide Hills Sout… Holiday          0.528            0.296                2
##  7 Adelaide Hills Sout… Other            0.593            0.404                2
##  8 Adelaide Hills Sout… Visiti…          0.488            0.254                0
##  9 Alice Springs  Nort… Busine…          0.534            0.251                0
## 10 Alice Springs  Nort… Holiday          0.381            0.832                3
## # … with 294 more rows, and 6 more variables: seasonal_trough_year <dbl>,
## #   spikiness <dbl>, linearity <dbl>, curvature <dbl>, stl_e_acf1 <dbl>,
## #   stl_e_acf10 <dbl>
tourism %>%
  features(Trips, feat_stl) %>%
  ggplot(aes(x = trend_strength, y = seasonal_strength_year,
             col = Purpose)) +
  geom_point() +
  facet_wrap(vars(State))

tourism %>%
  features(Trips, feat_stl) %>%
  filter(
    seasonal_strength_year == max(seasonal_strength_year)
  ) %>%
  left_join(tourism, by = c("State", "Region", "Purpose")) %>%
  ggplot(aes(x = Quarter, y = Trips)) +
  geom_line() +
  facet_grid(vars(State, Region, Purpose))

4.5 Exploring Australian tourism data

tourism_features <- tourism %>%
  features(Trips, feature_set(pkgs = "feasts"))
## Warning: `n_flat_spots()` was deprecated in feasts 0.1.5.
## Please use `longest_flat_spot()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
tourism_features
## # A tibble: 304 × 51
##    Region         State Purpose trend_strength seasonal_streng… seasonal_peak_y…
##    <chr>          <chr> <chr>            <dbl>            <dbl>            <dbl>
##  1 Adelaide       Sout… Busine…          0.464            0.407                3
##  2 Adelaide       Sout… Holiday          0.554            0.619                1
##  3 Adelaide       Sout… Other            0.746            0.202                2
##  4 Adelaide       Sout… Visiti…          0.435            0.452                1
##  5 Adelaide Hills Sout… Busine…          0.464            0.179                3
##  6 Adelaide Hills Sout… Holiday          0.528            0.296                2
##  7 Adelaide Hills Sout… Other            0.593            0.404                2
##  8 Adelaide Hills Sout… Visiti…          0.488            0.254                0
##  9 Alice Springs  Nort… Busine…          0.534            0.251                0
## 10 Alice Springs  Nort… Holiday          0.381            0.832                3
## # … with 294 more rows, and 45 more variables: seasonal_trough_year <dbl>,
## #   spikiness <dbl>, linearity <dbl>, curvature <dbl>, stl_e_acf1 <dbl>,
## #   stl_e_acf10 <dbl>, acf1 <dbl>, acf10 <dbl>, diff1_acf1 <dbl>,
## #   diff1_acf10 <dbl>, diff2_acf1 <dbl>, diff2_acf10 <dbl>, season_acf1 <dbl>,
## #   pacf5 <dbl>, diff1_pacf5 <dbl>, diff2_pacf5 <dbl>, season_pacf <dbl>,
## #   zero_run_mean <dbl>, nonzero_squared_cv <dbl>, zero_start_prop <dbl>,
## #   zero_end_prop <dbl>, lambda_guerrero <dbl>, kpss_stat <dbl>, …
library(glue)
tourism_features %>%
  select_at(vars(contains("season"), Purpose)) %>%
  mutate(
    seasonal_peak_year = seasonal_peak_year +
      4*(seasonal_peak_year==0),
    seasonal_trough_year = seasonal_trough_year +
      4*(seasonal_trough_year==0),
    seasonal_peak_year = glue("Q{seasonal_peak_year}"),
    seasonal_trough_year = glue("Q{seasonal_trough_year}"),
  ) %>%
  GGally::ggpairs(mapping = aes(colour = Purpose))
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(broom)
pcs <- tourism_features %>%
  select(-State, -Region, -Purpose) %>%
  prcomp(scale = TRUE) %>%
  augment(tourism_features)
pcs %>%
  ggplot(aes(x = .fittedPC1, y = .fittedPC2, col = Purpose)) +
  geom_point() +
  theme(aspect.ratio = 1)

outliers <- pcs %>%
  filter(.fittedPC1 > 10) %>%
  select(Region, State, Purpose, .fittedPC1, .fittedPC2)
outliers
## # A tibble: 4 × 5
##   Region                 State             Purpose  .fittedPC1 .fittedPC2
##   <chr>                  <chr>             <chr>         <dbl>      <dbl>
## 1 Australia's North West Western Australia Business       13.4    -11.3  
## 2 Australia's South West Western Australia Holiday        10.9      0.880
## 3 Melbourne              Victoria          Holiday        12.3    -10.4  
## 4 South Coast            New South Wales   Holiday        11.9      9.42
outliers %>%
  left_join(tourism, by = c("State", "Region", "Purpose")) %>%
  mutate(
    Series = glue("{State}", "{Region}", "{Purpose}",
                  .sep = "\n\n")
  ) %>%
  ggplot(aes(x = Quarter, y = Trips)) +
  geom_line() +
  facet_grid(Series ~ ., scales = "free") +
  labs(title = "Outlying time series in PC space")

5.1 A tidy forecasting workflow

gdppc <- global_economy %>%
  mutate(GDP_per_capita = GDP / Population)
gdppc %>%
  filter(Country == "Sweden") %>%
  autoplot(GDP_per_capita) +
  labs(y = "$US", title = "GDP per capita for Sweden")

# TSLM(GDP_per_capita ~ trend())
fit <- gdppc %>%
  model(trend_model = TSLM(GDP_per_capita ~ trend()))
## Warning: 7 errors (1 unique) encountered for trend_model
## [7] 0 (non-NA) cases
fit
## # A mable: 263 x 2
## # Key:     Country [263]
##    Country             trend_model
##    <fct>                   <model>
##  1 Afghanistan              <TSLM>
##  2 Albania                  <TSLM>
##  3 Algeria                  <TSLM>
##  4 American Samoa           <TSLM>
##  5 Andorra                  <TSLM>
##  6 Angola                   <TSLM>
##  7 Antigua and Barbuda      <TSLM>
##  8 Arab World               <TSLM>
##  9 Argentina                <TSLM>
## 10 Armenia                  <TSLM>
## # … with 253 more rows
fit %>% forecast(h = "3 years")
## # A fable: 789 x 5 [1Y]
## # Key:     Country, .model [263]
##    Country        .model       Year   GDP_per_capita  .mean
##    <fct>          <chr>       <dbl>           <dist>  <dbl>
##  1 Afghanistan    trend_model  2018     N(526, 9653)   526.
##  2 Afghanistan    trend_model  2019     N(534, 9689)   534.
##  3 Afghanistan    trend_model  2020     N(542, 9727)   542.
##  4 Albania        trend_model  2018  N(4716, 476419)  4716.
##  5 Albania        trend_model  2019  N(4867, 481086)  4867.
##  6 Albania        trend_model  2020  N(5018, 486012)  5018.
##  7 Algeria        trend_model  2018  N(4410, 643094)  4410.
##  8 Algeria        trend_model  2019  N(4489, 645311)  4489.
##  9 Algeria        trend_model  2020  N(4568, 647602)  4568.
## 10 American Samoa trend_model  2018 N(12491, 652926) 12491.
## # … with 779 more rows
fit %>%
  forecast(h = "3 years") %>%
  filter(Country == "Sweden") %>%
  autoplot(gdppc) +
  labs(y = "$US", title = "GDP per capita for Sweden")

# 5.2 Some simple forecasting methods

bricks <- aus_production %>%
  filter_index("1970 Q1" ~ "2004 Q4") %>%
  select(Bricks)
bricks %>% model(MEAN(Bricks))
## # A mable: 1 x 1
##   `MEAN(Bricks)`
##          <model>
## 1         <MEAN>
bricks %>% model(NAIVE(Bricks))
## # A mable: 1 x 1
##   `NAIVE(Bricks)`
##           <model>
## 1         <NAIVE>
bricks %>% model(SNAIVE(Bricks ~ lag("year")))
## # A mable: 1 x 1
##   `SNAIVE(Bricks ~ lag("year"))`
##                          <model>
## 1                       <SNAIVE>
bricks %>% model(RW(Bricks ~ drift()))
## # A mable: 1 x 1
##   `RW(Bricks ~ drift())`
##                  <model>
## 1          <RW w/ drift>
# Set training data from 1992 to 2006
train <- aus_production %>%
  filter_index("1992 Q1" ~ "2006 Q4")
# Fit the models
beer_fit <- train %>%
  model(
    Mean = MEAN(Beer),
    `Naïve` = NAIVE(Beer),
    `Seasonal naïve` = SNAIVE(Beer)
  )
# Generate forecasts for 14 quarters
beer_fc <- beer_fit %>% forecast(h = 14)
# Plot forecasts against actual values
beer_fc %>%
  autoplot(train, level = NULL) +
  autolayer(
    filter_index(aus_production, "2007 Q1" ~ .),
    colour = "black"
  ) +
  labs(
    y = "Megalitres",
    title = "Forecasts for quarterly beer production"
  ) +
  guides(colour = guide_legend(title = "Forecast"))
## Plot variable not specified, automatically selected `.vars = Beer`

# Re-index based on trading days
google_stock <- gafa_stock %>%
  filter(Symbol == "GOOG", year(Date) >= 2015) %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE)
# Filter the year of interest
google_2015 <- google_stock %>% filter(year(Date) == 2015)
# Fit the models
google_fit <- google_2015 %>%
  model(
    Mean = MEAN(Close),
    `Naïve` = NAIVE(Close),
    Drift = NAIVE(Close ~ drift())
  )
# Produce forecasts for the trading days in January 2016
google_jan_2016 <- google_stock %>%
  filter(yearmonth(Date) == yearmonth("2016 Jan"))
google_fc <- google_fit %>%
  forecast(new_data = google_jan_2016)
# Plot the forecasts
google_fc %>%
  autoplot(google_2015, level = NULL) +
  autolayer(google_jan_2016, Close, colour = "black") +
  labs(y = "$US",
       title = "Google daily closing stock prices",
       subtitle = "(Jan 2015 - Jan 2016)") +
  guides(colour = guide_legend(title = "Forecast"))

# 5.3 Fitted values and residuals

augment(beer_fit)
## # A tsibble: 180 x 6 [1Q]
## # Key:       .model [3]
##    .model Quarter  Beer .fitted .resid .innov
##    <chr>    <qtr> <dbl>   <dbl>  <dbl>  <dbl>
##  1 Mean   1992 Q1   443    436.   6.55   6.55
##  2 Mean   1992 Q2   410    436. -26.4  -26.4 
##  3 Mean   1992 Q3   420    436. -16.4  -16.4 
##  4 Mean   1992 Q4   532    436.  95.6   95.6 
##  5 Mean   1993 Q1   433    436.  -3.45  -3.45
##  6 Mean   1993 Q2   421    436. -15.4  -15.4 
##  7 Mean   1993 Q3   410    436. -26.4  -26.4 
##  8 Mean   1993 Q4   512    436.  75.6   75.6 
##  9 Mean   1994 Q1   449    436.  12.6   12.6 
## 10 Mean   1994 Q2   381    436. -55.4  -55.4 
## # … with 170 more rows

5.4 Residual diagnostics

autoplot(google_2015, Close) +
  labs(y = "$US",
       title = "Google daily closing stock prices in 2015")

aug <- google_2015 %>%
  model(NAIVE(Close)) %>%
  augment()
autoplot(aug, .innov) +
  labs(y = "$US",
       title = "Residuals from the naïve method")
## Warning: Removed 1 row(s) containing missing values (geom_path).

aug %>%
  ggplot(aes(x = .innov)) +
  geom_histogram() +
  labs(title = "Histogram of residuals")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

aug %>%
  ACF(.innov) %>%
  autoplot() +
  labs(title = "Residuals from the naïve method")

google_2015 %>%
  model(NAIVE(Close)) %>%
  gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).

aug %>% features(.innov, box_pierce, lag = 10, dof = 0)
## # A tibble: 1 × 4
##   Symbol .model       bp_stat bp_pvalue
##   <chr>  <chr>          <dbl>     <dbl>
## 1 GOOG   NAIVE(Close)    7.74     0.654
aug %>% features(.innov, ljung_box, lag = 10, dof = 0)
## # A tibble: 1 × 4
##   Symbol .model       lb_stat lb_pvalue
##   <chr>  <chr>          <dbl>     <dbl>
## 1 GOOG   NAIVE(Close)    7.91     0.637
fit <- google_2015 %>% model(RW(Close ~ drift()))
tidy(fit)
## # A tibble: 1 × 7
##   Symbol .model              term  estimate std.error statistic p.value
##   <chr>  <chr>               <chr>    <dbl>     <dbl>     <dbl>   <dbl>
## 1 GOOG   RW(Close ~ drift()) b        0.944     0.705      1.34   0.182
augment(fit) %>% features(.innov, ljung_box, lag=10, dof=1)
## # A tibble: 1 × 4
##   Symbol .model              lb_stat lb_pvalue
##   <chr>  <chr>                 <dbl>     <dbl>
## 1 GOOG   RW(Close ~ drift())    7.91     0.543

5.5 Distributional forecasts and prediction intervals

google_2015 %>%
  model(NAIVE(Close)) %>%
  forecast(h = 10) %>%
  hilo()
## # A tsibble: 10 x 7 [1]
## # Key:       Symbol, .model [1]
##    Symbol .model         day        Close .mean                  `80%`
##    <chr>  <chr>        <dbl>       <dist> <dbl>                 <hilo>
##  1 GOOG   NAIVE(Close)   253  N(759, 125)  759. [744.5400, 773.2200]80
##  2 GOOG   NAIVE(Close)   254  N(759, 250)  759. [738.6001, 779.1599]80
##  3 GOOG   NAIVE(Close)   255  N(759, 376)  759. [734.0423, 783.7177]80
##  4 GOOG   NAIVE(Close)   256  N(759, 501)  759. [730.1999, 787.5601]80
##  5 GOOG   NAIVE(Close)   257  N(759, 626)  759. [726.8147, 790.9453]80
##  6 GOOG   NAIVE(Close)   258  N(759, 751)  759. [723.7543, 794.0058]80
##  7 GOOG   NAIVE(Close)   259  N(759, 876)  759. [720.9399, 796.8202]80
##  8 GOOG   NAIVE(Close)   260 N(759, 1002)  759. [718.3203, 799.4397]80
##  9 GOOG   NAIVE(Close)   261 N(759, 1127)  759. [715.8599, 801.9001]80
## 10 GOOG   NAIVE(Close)   262 N(759, 1252)  759. [713.5329, 804.2272]80
## # … with 1 more variable: `95%` <hilo>
google_2015 %>%
  model(NAIVE(Close)) %>%
  forecast(h = 10) %>%
  autoplot(google_2015) +
  labs(title="Google daily closing stock price", y="$US" )

fit <- google_2015 %>%
  model(NAIVE(Close))
sim <- fit %>% generate(h = 30, times = 5, bootstrap = TRUE)
sim
## # A tsibble: 150 x 5 [1]
## # Key:       Symbol, .model, .rep [5]
##    Symbol .model         day .rep   .sim
##    <chr>  <chr>        <dbl> <chr> <dbl>
##  1 GOOG   NAIVE(Close)   253 1      759.
##  2 GOOG   NAIVE(Close)   254 1      753.
##  3 GOOG   NAIVE(Close)   255 1      749.
##  4 GOOG   NAIVE(Close)   256 1      749.
##  5 GOOG   NAIVE(Close)   257 1      748.
##  6 GOOG   NAIVE(Close)   258 1      753.
##  7 GOOG   NAIVE(Close)   259 1      748.
##  8 GOOG   NAIVE(Close)   260 1      761.
##  9 GOOG   NAIVE(Close)   261 1      751.
## 10 GOOG   NAIVE(Close)   262 1      750.
## # … with 140 more rows
google_2015 %>%
  ggplot(aes(x = day)) +
  geom_line(aes(y = Close)) +
  geom_line(aes(y = .sim, colour = as.factor(.rep)),
    data = sim) +
  labs(title="Google daily closing stock price", y="$US" ) +
  guides(colour = "none")

fc <- fit %>% forecast(h = 30, bootstrap = TRUE)
fc
## # A fable: 30 x 5 [1]
## # Key:     Symbol, .model [1]
##    Symbol .model         day        Close .mean
##    <chr>  <chr>        <dbl>       <dist> <dbl>
##  1 GOOG   NAIVE(Close)   253 sample[5000]  759.
##  2 GOOG   NAIVE(Close)   254 sample[5000]  759.
##  3 GOOG   NAIVE(Close)   255 sample[5000]  759.
##  4 GOOG   NAIVE(Close)   256 sample[5000]  759.
##  5 GOOG   NAIVE(Close)   257 sample[5000]  759.
##  6 GOOG   NAIVE(Close)   258 sample[5000]  759.
##  7 GOOG   NAIVE(Close)   259 sample[5000]  759.
##  8 GOOG   NAIVE(Close)   260 sample[5000]  759.
##  9 GOOG   NAIVE(Close)   261 sample[5000]  759.
## 10 GOOG   NAIVE(Close)   262 sample[5000]  759.
## # … with 20 more rows
autoplot(fc, google_2015) +
  labs(title="Google daily closing stock price", y="$US" )

google_2015 %>%
  model(NAIVE(Close)) %>%
  forecast(h = 10, bootstrap = TRUE, times = 1000) %>%
  hilo()
## # A tsibble: 10 x 7 [1]
## # Key:       Symbol, .model [1]
##    Symbol .model         day        Close .mean                  `80%`
##    <chr>  <chr>        <dbl>       <dist> <dbl>                 <hilo>
##  1 GOOG   NAIVE(Close)   253 sample[1000]  759. [747.9560, 769.4825]80
##  2 GOOG   NAIVE(Close)   254 sample[1000]  759. [742.1061, 774.5338]80
##  3 GOOG   NAIVE(Close)   255 sample[1000]  759. [738.9679, 777.7889]80
##  4 GOOG   NAIVE(Close)   256 sample[1000]  759. [735.2750, 781.2471]80
##  5 GOOG   NAIVE(Close)   257 sample[1000]  758. [731.1173, 784.0799]80
##  6 GOOG   NAIVE(Close)   258 sample[1000]  758. [727.7744, 788.5379]80
##  7 GOOG   NAIVE(Close)   259 sample[1000]  758. [726.2142, 793.7744]80
##  8 GOOG   NAIVE(Close)   260 sample[1000]  758. [721.7074, 796.6183]80
##  9 GOOG   NAIVE(Close)   261 sample[1000]  758. [719.4406, 799.8316]80
## 10 GOOG   NAIVE(Close)   262 sample[1000]  758. [717.9221, 802.4509]80
## # … with 1 more variable: `95%` <hilo>

5.6 Forecasting using transformations

prices %>%
  filter(!is.na(eggs)) %>%
  model(RW(log(eggs) ~ drift())) %>%
  forecast(h = 50) %>%
  autoplot(prices %>% filter(!is.na(eggs)),
    level = 80, point_forecast = lst(mean, median)
  ) +
  labs(title = "Annual egg prices",
       y = "$US (in cents adjusted for inflation) ")
## Warning: Ignoring unknown aesthetics: linetype

# 5.7 Forecasting with decomposition

us_retail_employment <- us_employment %>%
  filter(year(Month) >= 1990, Title == "Retail Trade")
dcmp <- us_retail_employment %>%
  model(STL(Employed ~ trend(window = 7), robust = TRUE)) %>%
  components() %>%
  select(-.model)
dcmp %>%
  model(NAIVE(season_adjust)) %>%
  forecast() %>%
  autoplot(dcmp) +
  labs(y = "Number of people",
       title = "US retail employment")

fit_dcmp <- us_retail_employment %>%
  model(stlf = decomposition_model(
    STL(Employed ~ trend(window = 7), robust = TRUE),
    NAIVE(season_adjust)
  ))
fit_dcmp %>%
  forecast() %>%
  autoplot(us_retail_employment)+
  labs(y = "Number of people",
       title = "US retail employment")

fit_dcmp %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).

# 5.8 Evaluating point forecast accuracy

aus_production %>% filter(year(Quarter) >= 1995)
## # A tsibble: 62 x 7 [1Q]
##    Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##      <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
##  1 1995 Q1   426    4714    430   1626       41768   131
##  2 1995 Q2   408    3939    457   1703       43686   167
##  3 1995 Q3   416    6137    417   1733       46022   181
##  4 1995 Q4   520    4739    370   1545       42800   145
##  5 1996 Q1   409    4275    310   1526       43661   133
##  6 1996 Q2   398    5239    358   1593       44707   162
##  7 1996 Q3   398    6293    379   1706       46326   184
##  8 1996 Q4   507    5575    369   1699       43346   146
##  9 1997 Q1   432    4802    330   1511       43938   135
## 10 1997 Q2   398    5523    390   1785       45828   171
## # … with 52 more rows
aus_production %>% filter_index("1995 Q1" ~ .)
## # A tsibble: 62 x 7 [1Q]
##    Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##      <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
##  1 1995 Q1   426    4714    430   1626       41768   131
##  2 1995 Q2   408    3939    457   1703       43686   167
##  3 1995 Q3   416    6137    417   1733       46022   181
##  4 1995 Q4   520    4739    370   1545       42800   145
##  5 1996 Q1   409    4275    310   1526       43661   133
##  6 1996 Q2   398    5239    358   1593       44707   162
##  7 1996 Q3   398    6293    379   1706       46326   184
##  8 1996 Q4   507    5575    369   1699       43346   146
##  9 1997 Q1   432    4802    330   1511       43938   135
## 10 1997 Q2   398    5523    390   1785       45828   171
## # … with 52 more rows
aus_production %>%
  slice(n()-19:0)
## # A tsibble: 20 x 7 [1Q]
##    Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##      <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
##  1 2005 Q3   408      NA     NA   2340       56043   221
##  2 2005 Q4   482      NA     NA   2265       54992   180
##  3 2006 Q1   438      NA     NA   2027       57112   171
##  4 2006 Q2   386      NA     NA   2278       57157   224
##  5 2006 Q3   405      NA     NA   2427       58400   233
##  6 2006 Q4   491      NA     NA   2451       56249   192
##  7 2007 Q1   427      NA     NA   2140       56244   187
##  8 2007 Q2   383      NA     NA   2362       55036   234
##  9 2007 Q3   394      NA     NA   2536       59806   245
## 10 2007 Q4   473      NA     NA   2562       56411   205
## 11 2008 Q1   420      NA     NA   2183       59118   194
## 12 2008 Q2   390      NA     NA   2558       56660   229
## 13 2008 Q3   410      NA     NA   2612       64067   249
## 14 2008 Q4   488      NA     NA   2373       59045   203
## 15 2009 Q1   415      NA     NA   1963       58368   196
## 16 2009 Q2   398      NA     NA   2160       57471   238
## 17 2009 Q3   419      NA     NA   2325       58394   252
## 18 2009 Q4   488      NA     NA   2273       57336   210
## 19 2010 Q1   414      NA     NA   1904       58309   205
## 20 2010 Q2   374      NA     NA   2401       58041   236
aus_retail %>%
  group_by(State, Industry) %>%
  slice(1:12)
## # A tsibble: 1,824 x 5 [1M]
## # Key:       State, Industry [152]
## # Groups:    State, Industry [152]
##    State                        Industry           `Series ID`    Month Turnover
##    <chr>                        <chr>              <chr>          <mth>    <dbl>
##  1 Australian Capital Territory Cafes, restaurant… A3349849A   1982 Apr      4.4
##  2 Australian Capital Territory Cafes, restaurant… A3349849A   1982 May      3.4
##  3 Australian Capital Territory Cafes, restaurant… A3349849A   1982 Jun      3.6
##  4 Australian Capital Territory Cafes, restaurant… A3349849A   1982 Jul      4  
##  5 Australian Capital Territory Cafes, restaurant… A3349849A   1982 Aug      3.6
##  6 Australian Capital Territory Cafes, restaurant… A3349849A   1982 Sep      4.2
##  7 Australian Capital Territory Cafes, restaurant… A3349849A   1982 Oct      4.8
##  8 Australian Capital Territory Cafes, restaurant… A3349849A   1982 Nov      5.4
##  9 Australian Capital Territory Cafes, restaurant… A3349849A   1982 Dec      6.9
## 10 Australian Capital Territory Cafes, restaurant… A3349849A   1983 Jan      3.8
## # … with 1,814 more rows
recent_production <- aus_production %>%
  filter(year(Quarter) >= 1992)
beer_train <- recent_production %>%
  filter(year(Quarter) <= 2007)

beer_fit <- beer_train %>%
  model(
    Mean = MEAN(Beer),
    `Naïve` = NAIVE(Beer),
    `Seasonal naïve` = SNAIVE(Beer),
    Drift = RW(Beer ~ drift())
  )

beer_fc <- beer_fit %>%
  forecast(h = 10)

beer_fc %>%
  autoplot(
    aus_production %>% filter(year(Quarter) >= 1992),
    level = NULL
  ) +
  labs(
    y = "Megalitres",
    title = "Forecasts for quarterly beer production"
  ) +
  guides(colour = guide_legend(title = "Forecast"))

accuracy(beer_fc, recent_production)
## # A tibble: 4 × 10
##   .model         .type    ME  RMSE   MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>          <chr> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Drift          Test  -54.0  64.9  58.9 -13.6  14.6  4.12  3.87  -0.0741
## 2 Mean           Test  -13.8  38.4  34.8  -3.97  8.28 2.44  2.29  -0.0691
## 3 Naïve          Test  -51.4  62.7  57.4 -13.0  14.2  4.01  3.74  -0.0691
## 4 Seasonal naïve Test    5.2  14.3  13.4   1.15  3.17 0.937 0.853  0.132
google_fit <- google_2015 %>%
  model(
    Mean = MEAN(Close),
    `Naïve` = NAIVE(Close),
    Drift = RW(Close ~ drift())
  )

google_fc <- google_fit %>%
  forecast(google_jan_2016)
google_fc %>%
  autoplot(bind_rows(google_2015, google_jan_2016),
    level = NULL) +
  labs(y = "$US",
       title = "Google closing stock prices from Jan 2015") +
  guides(colour = guide_legend(title = "Forecast"))

accuracy(google_fc, google_stock)
## # A tibble: 3 × 11
##   .model Symbol .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift  GOOG   Test  -49.8  53.1  49.8 -6.99  6.99  6.99  4.74 0.604
## 2 Mean   GOOG   Test  117.  118.  117.  16.2  16.2  16.4  10.5  0.496
## 3 Naïve  GOOG   Test  -40.4  43.4  40.4 -5.67  5.67  5.67  3.88 0.496

5.9 Evaluating distributional forecast accuracy

google_fc %>%
  filter(.model == "Naïve") %>%
  autoplot(bind_rows(google_2015, google_jan_2016), level=80)+
  labs(y = "$US",
       title = "Google closing stock prices")

google_fc %>%
  filter(.model == "Naïve", Date == "2016-01-04") %>%
  accuracy(google_stock, list(qs=quantile_score), probs=0.10)
## # A tibble: 1 × 4
##   .model Symbol .type    qs
##   <chr>  <chr>  <chr> <dbl>
## 1 Naïve  GOOG   Test   4.86
google_fc %>%
  filter(.model == "Naïve", Date == "2016-01-04") %>%
  accuracy(google_stock,
    list(winkler = winkler_score), level = 80)
## # A tibble: 1 × 4
##   .model Symbol .type winkler
##   <chr>  <chr>  <chr>   <dbl>
## 1 Naïve  GOOG   Test     55.7
google_fc %>%
  accuracy(google_stock, list(crps = CRPS))
## # A tibble: 3 × 4
##   .model Symbol .type  crps
##   <chr>  <chr>  <chr> <dbl>
## 1 Drift  GOOG   Test   33.5
## 2 Mean   GOOG   Test   76.7
## 3 Naïve  GOOG   Test   26.5
google_fc %>%
  accuracy(google_stock, list(skill = skill_score(CRPS)))
## # A tibble: 3 × 4
##   .model Symbol .type  skill
##   <chr>  <chr>  <chr>  <dbl>
## 1 Drift  GOOG   Test  -0.266
## 2 Mean   GOOG   Test  -1.90 
## 3 Naïve  GOOG   Test   0

5.10 Time series cross-validation

# Time series cross-validation accuracy
google_2015_tr <- google_2015 %>%
  stretch_tsibble(.init = 3, .step = 1) %>%
  relocate(Date, Symbol, .id)
google_2015_tr
## # A tsibble: 31,875 x 10 [1]
## # Key:       Symbol, .id [250]
##    Date       Symbol   .id  Open  High   Low Close Adj_Close  Volume   day
##    <date>     <chr>  <int> <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl> <int>
##  1 2015-01-02 GOOG       1  526.  528.  521.  522.      522. 1447600     1
##  2 2015-01-05 GOOG       1  520.  521.  510.  511.      511. 2059800     2
##  3 2015-01-06 GOOG       1  512.  513.  498.  499.      499. 2899900     3
##  4 2015-01-02 GOOG       2  526.  528.  521.  522.      522. 1447600     1
##  5 2015-01-05 GOOG       2  520.  521.  510.  511.      511. 2059800     2
##  6 2015-01-06 GOOG       2  512.  513.  498.  499.      499. 2899900     3
##  7 2015-01-07 GOOG       2  504.  504.  497.  498.      498. 2065100     4
##  8 2015-01-02 GOOG       3  526.  528.  521.  522.      522. 1447600     1
##  9 2015-01-05 GOOG       3  520.  521.  510.  511.      511. 2059800     2
## 10 2015-01-06 GOOG       3  512.  513.  498.  499.      499. 2899900     3
## # … with 31,865 more rows
# TSCV accuracy
google_2015_tr %>%
  model(RW(Close ~ drift())) %>%
  forecast(h = 1) %>%
  accuracy(google_2015)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 253
## # A tibble: 1 × 11
##   .model           Symbol .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>            <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 RW(Close ~ drif… GOOG   Test  0.726  11.3  7.26 0.112  1.19  1.02  1.01 0.0985
# Training set accuracy
google_2015 %>%
  model(RW(Close ~ drift())) %>%
  accuracy()
## # A tibble: 1 × 11
##   Symbol .model     .type        ME  RMSE   MAE     MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr>      <chr>     <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 GOOG   RW(Close … Trai… -3.04e-14  11.1  7.16 -0.0267  1.18  1.00 0.996 0.0976
google_2015_tr <- google_2015 %>%
  stretch_tsibble(.init = 3, .step = 1)
fc <- google_2015_tr %>%
  model(RW(Close ~ drift())) %>%
  forecast(h = 8) %>%
  group_by(.id) %>%
  mutate(h = row_number()) %>%
  ungroup() %>%
  as_fable(response = "Close", distribution = Close)
fc %>%
  accuracy(google_2015, by = c("h", ".model")) %>%
  ggplot(aes(x = h, y = RMSE)) +
  geom_point()
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 8 observations are missing between 253 and 260

# 5.11 Exercises

# Extract data of interest
recent_production <- aus_production %>%
  filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast() %>% autoplot(recent_production)

myseries_train <- myseries %>%
  filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

fit <- myseries_train %>%
  model(SNAIVE(Turnover))
fit %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).

fc <- fit %>%
  forecast(new_data = anti_join(myseries, myseries_train))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
fc %>% autoplot(myseries)

fit %>% accuracy()
## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439  1.21 0.915  5.23  12.4     1     1 0.768
fc %>% accuracy(myseries)
## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test  0.836  1.55  1.24  5.94  9.06  1.36  1.28 0.601

7.1 The linear model

us_change %>%
  pivot_longer(c(Consumption, Income), names_to="Series") %>%
  autoplot(value) +
  labs(y = "% change")

us_change %>%
  ggplot(aes(x = Income, y = Consumption)) +
  labs(y = "Consumption (quarterly % change)",
       x = "Income (quarterly % change)") +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

us_change %>%
  model(TSLM(Consumption ~ Income)) %>%
  report()
## Series: Consumption 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.58236 -0.27777  0.01862  0.32330  1.42229 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.54454    0.05403  10.079  < 2e-16 ***
## Income       0.27183    0.04673   5.817  2.4e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5905 on 196 degrees of freedom
## Multiple R-squared: 0.1472,  Adjusted R-squared: 0.1429
## F-statistic: 33.84 on 1 and 196 DF, p-value: 2.4022e-08
us_change %>%
  select(-Consumption, -Income) %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(Quarter, value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y") +
  guides(colour = "none") +
  labs(y="% change")

us_change %>%
  GGally::ggpairs(columns = 2:6)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

# 7.2 Least squares estimation

fit_consMR <- us_change %>%
  model(tslm = TSLM(Consumption ~ Income + Production +
                                    Unemployment + Savings))
report(fit_consMR)
## Series: Consumption 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.90555 -0.15821 -0.03608  0.13618  1.15471 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.253105   0.034470   7.343 5.71e-12 ***
## Income        0.740583   0.040115  18.461  < 2e-16 ***
## Production    0.047173   0.023142   2.038   0.0429 *  
## Unemployment -0.174685   0.095511  -1.829   0.0689 .  
## Savings      -0.052890   0.002924 -18.088  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3102 on 193 degrees of freedom
## Multiple R-squared: 0.7683,  Adjusted R-squared: 0.7635
## F-statistic:   160 on 4 and 193 DF, p-value: < 2.22e-16
augment(fit_consMR) %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Consumption, colour = "Data")) +
  geom_line(aes(y = .fitted, colour = "Fitted")) +
  labs(y = NULL,
    title = "Percent change in US consumption expenditure"
  ) +
  scale_colour_manual(values=c(Data="black",Fitted="#D55E00")) +
  guides(colour = guide_legend(title = NULL))

augment(fit_consMR) %>%
  ggplot(aes(x = Consumption, y = .fitted)) +
  geom_point() +
  labs(
    y = "Fitted (predicted values)",
    x = "Data (actual values)",
    title = "Percent change in US consumption expenditure"
  ) +
  geom_abline(intercept = 0, slope = 1)

# 7.3 Evaluating the regression model

fit_consMR %>% gg_tsresiduals()

augment(fit_consMR) %>%
  features(.innov, ljung_box, lag = 10, dof = 5)
## # A tibble: 1 × 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 tslm      18.9   0.00204
us_change %>%
  left_join(residuals(fit_consMR), by = "Quarter") %>%
  pivot_longer(Income:Unemployment,
               names_to = "regressor", values_to = "x") %>%
  ggplot(aes(x = x, y = .resid)) +
  geom_point() +
  facet_wrap(. ~ regressor, scales = "free_x") +
  labs(y = "Residuals", x = "")

augment(fit_consMR) %>%
  ggplot(aes(x = .fitted, y = .resid)) +
  geom_point() + labs(x = "Fitted", y = "Residuals")

fit <- aus_airpassengers %>%
  filter(Year <= 2011) %>%
  left_join(guinea_rice, by = "Year") %>%
  model(TSLM(Passengers ~ Production))
report(fit)
## Series: Passengers 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.9448 -1.8917 -0.3272  1.8620 10.4210 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -7.493      1.203  -6.229 2.25e-07 ***
## Production    40.288      1.337  30.135  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.239 on 40 degrees of freedom
## Multiple R-squared: 0.9578,  Adjusted R-squared: 0.9568
## F-statistic: 908.1 on 1 and 40 DF, p-value: < 2.22e-16
fit %>% gg_tsresiduals()

# 7.4 Some useful predictors

recent_production <- aus_production %>%
  filter(year(Quarter) >= 1992)
recent_production %>%
  autoplot(Beer) +
  labs(y = "Megalitres",
       title = "Australian quarterly beer production")

fit_beer <- recent_production %>%
  model(TSLM(Beer ~ trend() + season()))
report(fit_beer)
## Series: Beer 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -42.9029  -7.5995  -0.4594   7.9908  21.7895 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   441.80044    3.73353 118.333  < 2e-16 ***
## trend()        -0.34027    0.06657  -5.111 2.73e-06 ***
## season()year2 -34.65973    3.96832  -8.734 9.10e-13 ***
## season()year3 -17.82164    4.02249  -4.430 3.45e-05 ***
## season()year4  72.79641    4.02305  18.095  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.23 on 69 degrees of freedom
## Multiple R-squared: 0.9243,  Adjusted R-squared: 0.9199
## F-statistic: 210.7 on 4 and 69 DF, p-value: < 2.22e-16
augment(fit_beer) %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Beer, colour = "Data")) +
  geom_line(aes(y = .fitted, colour = "Fitted")) +
  scale_colour_manual(
    values = c(Data = "black", Fitted = "#D55E00")
  ) +
  labs(y = "Megalitres",
       title = "Australian quarterly beer production") +
  guides(colour = guide_legend(title = "Series"))

augment(fit_beer) %>%
  ggplot(aes(x = Beer, y = .fitted,
             colour = factor(quarter(Quarter)))) +
  geom_point() +
  labs(y = "Fitted", x = "Actual values",
       title = "Australian quarterly beer production") +
  geom_abline(intercept = 0, slope = 1) +
  guides(colour = guide_legend(title = "Quarter"))

fourier_beer <- recent_production %>%
  model(TSLM(Beer ~ trend() + fourier(K = 2)))
report(fourier_beer)
## Series: Beer 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -42.9029  -7.5995  -0.4594   7.9908  21.7895 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        446.87920    2.87321 155.533  < 2e-16 ***
## trend()             -0.34027    0.06657  -5.111 2.73e-06 ***
## fourier(K = 2)C1_4   8.91082    2.01125   4.430 3.45e-05 ***
## fourier(K = 2)S1_4 -53.72807    2.01125 -26.714  < 2e-16 ***
## fourier(K = 2)C2_4 -13.98958    1.42256  -9.834 9.26e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.23 on 69 degrees of freedom
## Multiple R-squared: 0.9243,  Adjusted R-squared: 0.9199
## F-statistic: 210.7 on 4 and 69 DF, p-value: < 2.22e-16

7.5 Selecting predictors

glance(fit_consMR) %>%
  select(adj_r_squared, CV, AIC, AICc, BIC)
## # A tibble: 1 × 5
##   adj_r_squared    CV   AIC  AICc   BIC
##           <dbl> <dbl> <dbl> <dbl> <dbl>
## 1         0.763 0.104 -457. -456. -437.

7.6 Forecasting with regression

recent_production <- aus_production %>%
  filter(year(Quarter) >= 1992)
fit_beer <- recent_production %>%
  model(TSLM(Beer ~ trend() + season()))
fc_beer <- forecast(fit_beer)
fc_beer %>%
  autoplot(recent_production) +
  labs(
    title = "Forecasts of beer production using regression",
    y = "megalitres"
  )

fit_consBest <- us_change %>%
  model(
    lm = TSLM(Consumption ~ Income + Savings + Unemployment)
  )
future_scenarios <- scenarios(
  Increase = new_data(us_change, 4) %>%
    mutate(Income=1, Savings=0.5, Unemployment=0),
  Decrease = new_data(us_change, 4) %>%
    mutate(Income=-1, Savings=-0.5, Unemployment=0),
  names_to = "Scenario")

fc <- forecast(fit_consBest, new_data = future_scenarios)
us_change %>%
  autoplot(Consumption) +
  autolayer(fc) +
  labs(title = "US consumption", y = "% change")

fit_cons <- us_change %>%
  model(TSLM(Consumption ~ Income))
new_cons <- scenarios(
  "Average increase" = new_data(us_change, 4) %>%
    mutate(Income = mean(us_change$Income)),
  "Extreme increase" = new_data(us_change, 4) %>%
    mutate(Income = 12),
  names_to = "Scenario"
)
fcast <- forecast(fit_cons, new_cons)

us_change %>%
  autoplot(Consumption) +
  autolayer(fcast) +
  labs(title = "US consumption", y = "% change")

# 7.7 Nonlinear regression

boston_men <- boston_marathon %>%
  filter(Year >= 1924) %>%
  filter(Event == "Men's open division") %>%
  mutate(Minutes = as.numeric(Time)/60)
fit_trends <- boston_men %>%
  model(
    linear = TSLM(Minutes ~ trend()),
    exponential = TSLM(log(Minutes) ~ trend()),
    piecewise = TSLM(Minutes ~ trend(knots = c(1950, 1980)))
  )
fc_trends <- fit_trends %>% forecast(h = 10)

boston_men %>%
  autoplot(Minutes) +
  geom_line(data = fitted(fit_trends),
            aes(y = .fitted, colour = .model)) +
  autolayer(fc_trends, alpha = 0.5, level = 95) +
  labs(y = "Minutes",
       title = "Boston marathon winning times")

7.10 Exercises

jan14_vic_elec <- vic_elec %>%
  filter(yearmonth(Time) == yearmonth("2014 Jan")) %>%
  index_by(Date = as_date(Time)) %>%
  summarise(
    Demand = sum(Demand),
    Temperature = max(Temperature)
  )
jan14_vic_elec %>%
  model(TSLM(Demand ~ Temperature)) %>%
  forecast(
    new_data(jan14_vic_elec, 1) %>%
      mutate(Temperature = 15)
  ) %>%
  autoplot(jan14_vic_elec)

8.1 Simple exponential smoothing

algeria_economy <- global_economy %>%
  filter(Country == "Algeria")
algeria_economy %>%
  autoplot(Exports) +
  labs(y = "% of GDP", title = "Exports: Algeria")

# Estimate parameters
fit <- algeria_economy %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fc <- fit %>%
  forecast(h = 5)
fc %>%
  autoplot(algeria_economy) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit)) +
  labs(y="% of GDP", title="Exports: Algeria") +
  guides(colour = "none")

# 8.2 Methods with trend

aus_economy <- global_economy %>%
  filter(Code == "AUS") %>%
  mutate(Pop = Population / 1e6)
autoplot(aus_economy, Pop) +
  labs(y = "Millions", title = "Australian population")

fit <- aus_economy %>%
  model(
    AAN = ETS(Pop ~ error("A") + trend("A") + season("N"))
  )
fc <- fit %>% forecast(h = 10)
aus_economy %>%
  model(
    `Holt's method` = ETS(Pop ~ error("A") +
                       trend("A") + season("N")),
    `Damped Holt's method` = ETS(Pop ~ error("A") +
                       trend("Ad", phi = 0.9) + season("N"))
  ) %>%
  forecast(h = 15) %>%
  autoplot(aus_economy, level = NULL) +
  labs(title = "Australian population",
       y = "Millions") +
  guides(colour = guide_legend(title = "Forecast"))

www_usage <- as_tsibble(WWWusage)
www_usage %>% autoplot(value) +
  labs(x="Minute", y="Number of users",
       title = "Internet usage per minute")

www_usage %>%
  stretch_tsibble(.init = 10) %>%
  model(
    SES = ETS(value ~ error("A") + trend("N") + season("N")),
    Holt = ETS(value ~ error("A") + trend("A") + season("N")),
    Damped = ETS(value ~ error("A") + trend("Ad") +
                   season("N"))
  ) %>%
  forecast(h = 1) %>%
  accuracy(www_usage)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 101
## # A tibble: 3 × 10
##   .model .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped Test  0.288   3.69  3.00 0.347  2.26 0.663 0.636 0.336
## 2 Holt   Test  0.0610  3.87  3.17 0.244  2.38 0.701 0.668 0.296
## 3 SES    Test  1.46    6.05  4.81 0.904  3.55 1.06  1.04  0.803
fit <- www_usage %>%
  model(
    Damped = ETS(value ~ error("A") + trend("Ad") +
                   season("N"))
  )
# Estimated parameters:
tidy(fit)
## # A tibble: 5 × 3
##   .model term  estimate
##   <chr>  <chr>    <dbl>
## 1 Damped alpha   1.00  
## 2 Damped beta    0.997 
## 3 Damped phi     0.815 
## 4 Damped l[0]   90.4   
## 5 Damped b[0]   -0.0173
fit %>%
  forecast(h = 10) %>%
  autoplot(www_usage) +
  labs(x="Minute", y="Number of users",
       title = "Internet usage per minute")

# 8.3 Methods with seasonality

aus_holidays <- tourism %>%
  filter(Purpose == "Holiday") %>%
  summarise(Trips = sum(Trips)/1e3)
fit <- aus_holidays %>%
  model(
    additive = ETS(Trips ~ error("A") + trend("A") +
                                                season("A")),
    multiplicative = ETS(Trips ~ error("M") + trend("A") +
                                                season("M"))
  )
fc <- fit %>% forecast(h = "3 years")
fc %>%
  autoplot(aus_holidays, level = NULL) +
  labs(title="Australian domestic tourism",
       y="Overnight trips (millions)") +
  guides(colour = guide_legend(title = "Forecast"))

sth_cross_ped <- pedestrian %>%
  filter(Date >= "2016-07-01",
         Sensor == "Southern Cross Station") %>%
  index_by(Date) %>%
  summarise(Count = sum(Count)/1000)
sth_cross_ped %>%
  filter(Date <= "2016-07-31") %>%
  model(
    hw = ETS(Count ~ error("M") + trend("Ad") + season("M"))
  ) %>%
  forecast(h = "2 weeks") %>%
  autoplot(sth_cross_ped %>% filter(Date <= "2016-08-14")) +
  labs(title = "Daily traffic: Southern Cross",
       y="Pedestrians ('000)")

8.6 Estimation and model selection

aus_holidays <- tourism %>%
  filter(Purpose == "Holiday") %>%
  summarise(Trips = sum(Trips)/1e3)
fit <- aus_holidays %>%
  model(ETS(Trips))
report(fit)
## Series: Trips 
## Model: ETS(M,N,A) 
##   Smoothing parameters:
##     alpha = 0.3484054 
##     gamma = 0.0001000018 
## 
##   Initial states:
##      l[0]       s[0]      s[-1]      s[-2]    s[-3]
##  9.727072 -0.5376106 -0.6884343 -0.2933663 1.519411
## 
##   sigma^2:  0.0022
## 
##      AIC     AICc      BIC 
## 226.2289 227.7845 242.9031
components(fit) %>%
  autoplot() +
  labs(title = "ETS(M,N,A) components")
## Warning: Removed 4 row(s) containing missing values (geom_path).

# 8.7 Forecasting with ETS models

fit %>%
  forecast(h = 8) %>%
  autoplot(aus_holidays)+
  labs(title="Australian domestic tourism",
       y="Overnight trips (millions)")

9.1 Stationarity and differencing

google_2015 <- gafa_stock %>%
  filter(Symbol == "GOOG", year(Date) == 2015) 
google_2015 %>% ACF(Close) %>% 
  autoplot() + labs(subtitle = "Google closing stock price")
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

google_2015 %>% ACF(difference(Close)) %>% 
  autoplot() + labs(subtitle = "Changes in Google closing stock price")
## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

google_2015 %>%
  mutate(diff_close = difference(Close)) %>%
  features(diff_close, ljung_box, lag = 10)
## # A tibble: 1 × 3
##   Symbol lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 GOOG      7.91     0.637
PBS %>%
  filter(ATC2 == "H02") %>%
  summarise(Cost = sum(Cost)/1e6) %>%
  transmute(
    `Sales ($million)` = Cost,
    `Log sales` = log(Cost),
    `Annual change in log sales` = difference(log(Cost), 12),
    `Doubly differenced log sales` =
                     difference(difference(log(Cost), 12), 1)
  ) %>%
  pivot_longer(-Month, names_to="Type", values_to="Sales") %>%
  mutate(
    Type = factor(Type, levels = c(
      "Sales ($million)",
      "Log sales",
      "Annual change in log sales",
      "Doubly differenced log sales"))
  ) %>%
  ggplot(aes(x = Month, y = Sales)) +
  geom_line() +
  facet_grid(vars(Type), scales = "free_y") +
  labs(title = "Corticosteroid drug sales", y = NULL)

google_2015 %>%
  features(Close, unitroot_kpss)
## # A tibble: 1 × 3
##   Symbol kpss_stat kpss_pvalue
##   <chr>      <dbl>       <dbl>
## 1 GOOG        3.56        0.01
google_2015 %>%
  mutate(diff_close = difference(Close)) %>%
  features(diff_close, unitroot_kpss)
## # A tibble: 1 × 3
##   Symbol kpss_stat kpss_pvalue
##   <chr>      <dbl>       <dbl>
## 1 GOOG      0.0989         0.1
google_2015 %>%
  features(Close, unitroot_ndiffs)
## # A tibble: 1 × 2
##   Symbol ndiffs
##   <chr>   <int>
## 1 GOOG        1
aus_total_retail <- aus_retail %>%
  summarise(Turnover = sum(Turnover))
aus_total_retail %>%
  mutate(log_turnover = log(Turnover)) %>%
  features(log_turnover, unitroot_nsdiffs)
## # A tibble: 1 × 1
##   nsdiffs
##     <int>
## 1       1
aus_total_retail %>%
  mutate(log_turnover = difference(log(Turnover), 12)) %>%
  features(log_turnover, unitroot_ndiffs)
## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      1

9.5 Non-seasonal ARIMA models

global_economy %>%
  filter(Code == "EGY") %>%
  autoplot(Exports) +
  labs(y = "% of GDP", title = "Egyptian exports")

fit <- global_economy %>%
  filter(Code == "EGY") %>%
  model(ARIMA(Exports))
report(fit)
## Series: Exports 
## Model: ARIMA(2,0,1) w/ mean 
## 
## Coefficients:
##          ar1      ar2      ma1  constant
##       1.6764  -0.8034  -0.6896    2.5623
## s.e.  0.1111   0.0928   0.1492    0.1161
## 
## sigma^2 estimated as 8.046:  log likelihood=-141.57
## AIC=293.13   AICc=294.29   BIC=303.43
fit %>% forecast(h=10) %>%
  autoplot(global_economy) +
  labs(y = "% of GDP", title = "Egyptian exports")

global_economy %>%
  filter(Code == "EGY") %>%
  ACF(Exports) %>%
  autoplot()

global_economy %>%
  filter(Code == "EGY") %>%
  PACF(Exports) %>%
  autoplot()

fit2 <- global_economy %>%
  filter(Code == "EGY") %>%
  model(ARIMA(Exports ~ pdq(4,0,0)))
report(fit2)
## Series: Exports 
## Model: ARIMA(4,0,0) w/ mean 
## 
## Coefficients:
##          ar1      ar2     ar3      ar4  constant
##       0.9861  -0.1715  0.1807  -0.3283    6.6922
## s.e.  0.1247   0.1865  0.1865   0.1273    0.3562
## 
## sigma^2 estimated as 7.885:  log likelihood=-140.53
## AIC=293.05   AICc=294.7   BIC=305.41

9.7 ARIMA modelling in fable

global_economy %>%
  filter(Code == "CAF") %>%
  autoplot(Exports) +
  labs(title="Central African Republic exports",
       y="% of GDP")

global_economy %>%
  filter(Code == "CAF") %>%
  gg_tsdisplay(difference(Exports), plot_type='partial')
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

caf_fit <- global_economy %>%
  filter(Code == "CAF") %>%
  model(arima210 = ARIMA(Exports ~ pdq(2,1,0)),
        arima013 = ARIMA(Exports ~ pdq(0,1,3)),
        stepwise = ARIMA(Exports),
        search = ARIMA(Exports, stepwise=FALSE))

caf_fit %>% pivot_longer(!Country, names_to = "Model name",
                         values_to = "Orders")
## # A mable: 4 x 3
## # Key:     Country, Model name [4]
##   Country                  `Model name`         Orders
##   <fct>                    <chr>               <model>
## 1 Central African Republic arima210     <ARIMA(2,1,0)>
## 2 Central African Republic arima013     <ARIMA(0,1,3)>
## 3 Central African Republic stepwise     <ARIMA(2,1,2)>
## 4 Central African Republic search       <ARIMA(3,1,0)>
glance(caf_fit) %>% arrange(AICc) %>% select(.model:BIC)
## # A tibble: 4 × 6
##   .model   sigma2 log_lik   AIC  AICc   BIC
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 search     6.52   -133.  274.  275.  282.
## 2 arima210   6.71   -134.  275.  275.  281.
## 3 arima013   6.54   -133.  274.  275.  282.
## 4 stepwise   6.42   -132.  274.  275.  284.
caf_fit %>%
  select(search) %>%
  gg_tsresiduals()

augment(caf_fit) %>%
  filter(.model=='search') %>%
  features(.innov, ljung_box, lag = 10, dof = 3)
## # A tibble: 1 × 4
##   Country                  .model lb_stat lb_pvalue
##   <fct>                    <chr>    <dbl>     <dbl>
## 1 Central African Republic search    5.75     0.569
caf_fit %>%
  forecast(h=5) %>%
  filter(.model=='search') %>%
  autoplot(global_economy)

gg_arma(caf_fit %>% select(Country, search))

9.9 Seasonal ARIMA models

leisure <- us_employment %>%
  filter(Title == "Leisure and Hospitality",
         year(Month) > 2000) %>%
  mutate(Employed = Employed/1000) %>%
  select(Month, Employed)
autoplot(leisure, Employed) +
  labs(title = "US employment: leisure and hospitality",
       y="Number of people (millions)")

leisure %>%
  gg_tsdisplay(difference(Employed, 12),
               plot_type='partial', lag=36) +
  labs(title="Seasonally differenced", y="")
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).

leisure %>%
  gg_tsdisplay(difference(Employed, 12) %>% difference(),
               plot_type='partial', lag=36) +
  labs(title = "Double differenced", y="")
## Warning: Removed 13 row(s) containing missing values (geom_path).
## Warning: Removed 13 rows containing missing values (geom_point).

fit <- leisure %>%
  model(
    arima012011 = ARIMA(Employed ~ pdq(0,1,2) + PDQ(0,1,1)),
    arima210011 = ARIMA(Employed ~ pdq(2,1,0) + PDQ(0,1,1)),
    auto = ARIMA(Employed, stepwise = FALSE, approx = FALSE)
  )
fit %>% pivot_longer(everything(), names_to = "Model name",
                     values_to = "Orders")
## # A mable: 3 x 2
## # Key:     Model name [3]
##   `Model name`                    Orders
##   <chr>                          <model>
## 1 arima012011  <ARIMA(0,1,2)(0,1,1)[12]>
## 2 arima210011  <ARIMA(2,1,0)(0,1,1)[12]>
## 3 auto         <ARIMA(2,1,0)(1,1,1)[12]>
glance(fit) %>% arrange(AICc) %>% select(.model:BIC)
## # A tibble: 3 × 6
##   .model       sigma2 log_lik   AIC  AICc   BIC
##   <chr>         <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 auto        0.00142    395. -780. -780. -763.
## 2 arima210011 0.00145    392. -776. -776. -763.
## 3 arima012011 0.00146    391. -775. -775. -761.
fit %>% select(auto) %>% gg_tsresiduals(lag=36)

augment(fit) %>%
  filter(.model == "auto") %>%
  features(.innov, ljung_box, lag=24, dof=4)
## # A tibble: 1 × 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 auto      16.6     0.680
forecast(fit, h=36) %>%
  filter(.model=='auto') %>%
  autoplot(leisure) +
  labs(title = "US employment: leisure and hospitality",
       y="Number of people (millions)")

h02 <- PBS %>%
  filter(ATC2 == "H02") %>%
  summarise(Cost = sum(Cost)/1e6)
h02 %>%
  mutate(log(Cost)) %>%
  pivot_longer(-Month) %>%
  ggplot(aes(x = Month, y = value)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y") +
  labs(y="", title="Corticosteroid drug scripts (H02)")

h02 %>% gg_tsdisplay(difference(log(Cost), 12),
                     plot_type='partial', lag_max = 24)
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).

fit <- h02 %>%
  model(ARIMA(log(Cost) ~ 0 + pdq(3,0,1) + PDQ(0,1,2)))
fit %>% gg_tsresiduals(lag_max=36)

augment(fit) %>%
  features(.innov, ljung_box, lag = 36, dof = 6)
## # A tibble: 1 × 3
##   .model                                             lb_stat lb_pvalue
##   <chr>                                                <dbl>     <dbl>
## 1 ARIMA(log(Cost) ~ 0 + pdq(3, 0, 1) + PDQ(0, 1, 2))    50.7    0.0104
h02 %>%
  model(ARIMA(log(Cost) ~ 0 + pdq(3,0,1) + PDQ(0,1,2))) %>%
  forecast() %>%
  autoplot(h02) +
  labs(y=" $AU (millions)",
       title="Corticosteroid drug scripts (H02) sales")

# 9.10 ARIMA vs ETS

aus_economy <- global_economy %>%
  filter(Code == "AUS") %>%
  mutate(Population = Population/1e6)

aus_economy %>%
  slice(-n()) %>%
  stretch_tsibble(.init = 10) %>%
  model(
    ETS(Population),
    ARIMA(Population)
  ) %>%
  forecast(h = 1) %>%
  accuracy(aus_economy) %>%
  select(.model, RMSE:MAPE)
## # A tibble: 2 × 5
##   .model              RMSE    MAE   MPE  MAPE
##   <chr>              <dbl>  <dbl> <dbl> <dbl>
## 1 ARIMA(Population) 0.194  0.0789 0.277 0.509
## 2 ETS(Population)   0.0774 0.0543 0.112 0.327
aus_economy %>%
  model(ETS(Population)) %>%
  forecast(h = "5 years") %>%
  autoplot(aus_economy %>% filter(Year >= 2000)) +
  labs(title = "Australian population",
       y = "People (millions)")

cement <- aus_production %>%
  select(Cement) %>%
  filter_index("1988 Q1" ~ .)
train <- cement %>% filter_index(. ~ "2007 Q4")
fit_arima <- train %>% model(ARIMA(Cement))
report(fit_arima)
## Series: Cement 
## Model: ARIMA(1,0,1)(2,1,1)[4] w/ drift 
## 
## Coefficients:
##          ar1      ma1   sar1     sar2     sma1  constant
##       0.8886  -0.2366  0.081  -0.2345  -0.8979    5.3884
## s.e.  0.0842   0.1334  0.157   0.1392   0.1780    1.4844
## 
## sigma^2 estimated as 11456:  log likelihood=-463.52
## AIC=941.03   AICc=942.68   BIC=957.35
fit_arima %>% gg_tsresiduals(lag_max = 16)

augment(fit_arima) %>%
  features(.innov, ljung_box, lag = 16, dof = 6)
## # A tibble: 1 × 3
##   .model        lb_stat lb_pvalue
##   <chr>           <dbl>     <dbl>
## 1 ARIMA(Cement)    6.37     0.783
fit_ets <- train %>% model(ETS(Cement))
report(fit_ets)
## Series: Cement 
## Model: ETS(M,N,M) 
##   Smoothing parameters:
##     alpha = 0.7533714 
##     gamma = 0.0001000093 
## 
##   Initial states:
##      l[0]     s[0]    s[-1]    s[-2]     s[-3]
##  1694.712 1.031179 1.045209 1.011424 0.9121874
## 
##   sigma^2:  0.0034
## 
##      AIC     AICc      BIC 
## 1104.095 1105.650 1120.769
fit_ets %>%
  gg_tsresiduals(lag_max = 16)

augment(fit_ets) %>%
  features(.innov, ljung_box, lag = 16, dof = 6)
## # A tibble: 1 × 3
##   .model      lb_stat lb_pvalue
##   <chr>         <dbl>     <dbl>
## 1 ETS(Cement)    10.0     0.438
# Generate forecasts and compare accuracy over the test set
bind_rows(
    fit_arima %>% accuracy(),
    fit_ets %>% accuracy(),
    fit_arima %>% forecast(h = 10) %>% accuracy(cement),
    fit_ets %>% forecast(h = 10) %>% accuracy(cement)
  ) %>%
  select(-ME, -MPE, -ACF1)
## # A tibble: 4 × 7
##   .model        .type     RMSE   MAE  MAPE  MASE RMSSE
##   <chr>         <chr>    <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(Cement) Training  100.  79.9  4.37 0.546 0.582
## 2 ETS(Cement)   Training  103.  80.0  4.41 0.547 0.596
## 3 ARIMA(Cement) Test      216. 186.   8.68 1.27  1.26 
## 4 ETS(Cement)   Test      222. 191.   8.85 1.30  1.29
cement %>%
  model(ARIMA(Cement)) %>%
  forecast(h="3 years") %>%
  autoplot(cement) +
  labs(title = "Cement production in Australia",
       y = "Tonnes ('000)")

10.2 Regression with ARIMA errors using fable

ARIMA(y ~ x + pdq(1,1,0))
## <ARIMA model definition>
us_change %>%
  pivot_longer(c(Consumption, Income),
               names_to = "var", values_to = "value") %>%
  ggplot(aes(x = Quarter, y = value)) +
  geom_line() +
  facet_grid(vars(var), scales = "free_y") +
  labs(title = "US consumption and personal income",
       y = "Quarterly % change")

fit <- us_change %>%
  model(ARIMA(Consumption ~ Income))
report(fit)
## Series: Consumption 
## Model: LM w/ ARIMA(1,0,2) errors 
## 
## Coefficients:
##          ar1      ma1     ma2  Income  intercept
##       0.7070  -0.6172  0.2066  0.1976     0.5949
## s.e.  0.1068   0.1218  0.0741  0.0462     0.0850
## 
## sigma^2 estimated as 0.3113:  log likelihood=-163.04
## AIC=338.07   AICc=338.51   BIC=357.8
bind_rows(
    `Regression residuals` =
        as_tibble(residuals(fit, type = "regression")),
    `ARIMA residuals` =
        as_tibble(residuals(fit, type = "innovation")),
    .id = "type"
  ) %>%
  mutate(
    type = factor(type, levels=c(
      "Regression residuals", "ARIMA residuals"))
  ) %>%
  ggplot(aes(x = Quarter, y = .resid)) +
  geom_line() +
  facet_grid(vars(type))

fit %>% gg_tsresiduals()

augment(fit) %>%
  features(.innov, ljung_box, dof = 5, lag = 8)
## # A tibble: 1 × 3
##   .model                      lb_stat lb_pvalue
##   <chr>                         <dbl>     <dbl>
## 1 ARIMA(Consumption ~ Income)    5.21     0.157

10.3 Forecasting

us_change_future <- new_data(us_change, 8) %>%
  mutate(Income = mean(us_change$Income))
forecast(fit, new_data = us_change_future) %>%
  autoplot(us_change) +
  labs(y = "Percentage change")

vic_elec_daily <- vic_elec %>%
  filter(year(Time) == 2014) %>%
  index_by(Date = date(Time)) %>%
  summarise(
    Demand = sum(Demand) / 1e3,
    Temperature = max(Temperature),
    Holiday = any(Holiday)
  ) %>%
  mutate(Day_Type = case_when(
    Holiday ~ "Holiday",
    wday(Date) %in% 2:6 ~ "Weekday",
    TRUE ~ "Weekend"
  ))

vic_elec_daily %>%
  ggplot(aes(x = Temperature, y = Demand, colour = Day_Type)) +
  geom_point() +
  labs(y = "Electricity demand (GW)",
       x = "Maximum daily temperature")

vic_elec_daily %>%
  pivot_longer(c(Demand, Temperature)) %>%
  ggplot(aes(x = Date, y = value)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y") + ylab("")

fit <- vic_elec_daily %>%
  model(ARIMA(Demand ~ Temperature + I(Temperature^2) +
                (Day_Type == "Weekday")))
fit %>% gg_tsresiduals()

augment(fit) %>%
  features(.innov, ljung_box, dof = 9, lag = 14)
## # A tibble: 1 × 3
##   .model                                                       lb_stat lb_pvalue
##   <chr>                                                          <dbl>     <dbl>
## 1 "ARIMA(Demand ~ Temperature + I(Temperature^2) + (Day_Type …    28.4 0.0000304
vic_elec_future <- new_data(vic_elec_daily, 14) %>%
  mutate(
    Temperature = 26,
    Holiday = c(TRUE, rep(FALSE, 13)),
    Day_Type = case_when(
      Holiday ~ "Holiday",
      wday(Date) %in% 2:6 ~ "Weekday",
      TRUE ~ "Weekend"
    )
  )
forecast(fit, vic_elec_future) %>%
  autoplot(vic_elec_daily) +
  labs(title="Daily electricity demand: Victoria",
       y="GW")

# 10.4 Stochastic and deterministic trends

aus_airpassengers %>%
  autoplot(Passengers) +
  labs(y = "Passengers (millions)",
       title = "Total annual air passengers")

fit_deterministic <- aus_airpassengers %>%
  model(deterministic = ARIMA(Passengers ~ 1 + trend() +
                                pdq(d = 0)))
report(fit_deterministic)
## Series: Passengers 
## Model: LM w/ ARIMA(1,0,0) errors 
## 
## Coefficients:
##          ar1  trend()  intercept
##       0.9564   1.4151     0.9014
## s.e.  0.0362   0.1972     7.0751
## 
## sigma^2 estimated as 4.343:  log likelihood=-100.88
## AIC=209.77   AICc=210.72   BIC=217.17
fit_stochastic <- aus_airpassengers %>%
  model(stochastic = ARIMA(Passengers ~ pdq(d = 1)))
report(fit_stochastic)
## Series: Passengers 
## Model: ARIMA(0,1,0) w/ drift 
## 
## Coefficients:
##       constant
##         1.4191
## s.e.    0.3014
## 
## sigma^2 estimated as 4.271:  log likelihood=-98.16
## AIC=200.31   AICc=200.59   BIC=203.97
aus_airpassengers %>%
  autoplot(Passengers) +
  autolayer(fit_stochastic %>% forecast(h = 20),
    colour = "#0072B2", level = 95) +
  autolayer(fit_deterministic %>% forecast(h = 20),
    colour = "#D55E00", alpha = 0.65, level = 95) +
  labs(y = "Air passengers (millions)",
       title = "Forecasts from trend models")

# 10.5 Dynamic harmonic regression

aus_cafe <- aus_retail %>%
  filter(
    Industry == "Cafes, restaurants and takeaway food services",
    year(Month) %in% 2004:2018
  ) %>%
  summarise(Turnover = sum(Turnover))

fit <- model(aus_cafe,
  `K = 1` = ARIMA(log(Turnover) ~ fourier(K=1) + PDQ(0,0,0)),
  `K = 2` = ARIMA(log(Turnover) ~ fourier(K=2) + PDQ(0,0,0)),
  `K = 3` = ARIMA(log(Turnover) ~ fourier(K=3) + PDQ(0,0,0)),
  `K = 4` = ARIMA(log(Turnover) ~ fourier(K=4) + PDQ(0,0,0)),
  `K = 5` = ARIMA(log(Turnover) ~ fourier(K=5) + PDQ(0,0,0)),
  `K = 6` = ARIMA(log(Turnover) ~ fourier(K=6) + PDQ(0,0,0))
)

fit %>%
  forecast(h = "2 years") %>%
  autoplot(aus_cafe, level = 95) +
  facet_wrap(vars(.model), ncol = 2) +
  guides(colour = "none", fill = "none", level = "none") +
  geom_label(
    aes(x = yearmonth("2007 Jan"), y = 4250,
        label = paste0("AICc = ", format(AICc))),
    data = glance(fit)
  ) +
  labs(title= "Total monthly eating-out expenditure",
       y="$ billions")

# 10.6 Lagged predictors

insurance %>%
  pivot_longer(Quotes:TVadverts) %>%
  ggplot(aes(x = Month, y = value)) +
  geom_line() +
  facet_grid(vars(name), scales = "free_y") +
  labs(y = "", title = "Insurance advertising and quotations")

fit <- insurance %>%
  # Restrict data so models use same fitting period
  mutate(Quotes = c(NA, NA, NA, Quotes[4:40])) %>%
  # Estimate models
  model(
    lag0 = ARIMA(Quotes ~ pdq(d = 0) + TVadverts),
    lag1 = ARIMA(Quotes ~ pdq(d = 0) +
                 TVadverts + lag(TVadverts)),
    lag2 = ARIMA(Quotes ~ pdq(d = 0) +
                 TVadverts + lag(TVadverts) +
                 lag(TVadverts, 2)),
    lag3 = ARIMA(Quotes ~ pdq(d = 0) +
                 TVadverts + lag(TVadverts) +
                 lag(TVadverts, 2) + lag(TVadverts, 3))
  )
glance(fit)
## # A tibble: 4 × 8
##   .model sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 lag0    0.265   -28.3  66.6  68.3  75.0 <cpl [2]> <cpl [0]>
## 2 lag1    0.209   -24.0  58.1  59.9  66.5 <cpl [1]> <cpl [1]>
## 3 lag2    0.215   -24.0  60.0  62.6  70.2 <cpl [1]> <cpl [1]>
## 4 lag3    0.206   -22.2  60.3  65.0  73.8 <cpl [1]> <cpl [1]>
fit_best <- insurance %>%
  model(ARIMA(Quotes ~ pdq(d = 0) +
              TVadverts + lag(TVadverts)))
report(fit_best)
## Series: Quotes 
## Model: LM w/ ARIMA(1,0,2) errors 
## 
## Coefficients:
##          ar1     ma1     ma2  TVadverts  lag(TVadverts)  intercept
##       0.5123  0.9169  0.4591     1.2527          0.1464     2.1554
## s.e.  0.1849  0.2051  0.1895     0.0588          0.0531     0.8595
## 
## sigma^2 estimated as 0.2166:  log likelihood=-23.94
## AIC=61.88   AICc=65.38   BIC=73.7
insurance_future <- new_data(insurance, 20) %>%
  mutate(TVadverts = 8)
fit_best %>%
  forecast(insurance_future) %>%
  autoplot(insurance) +
  labs(
    y = "Quotes",
    title = "Forecast quotes with future advertising set to 8"
  )

# 10.7 Exercises

vic_elec_daily <- vic_elec %>%
  filter(year(Time) == 2014) %>%
  index_by(Date = date(Time)) %>%
  summarise(
    Demand = sum(Demand)/1e3,
    Temperature = max(Temperature),
    Holiday = any(Holiday)) %>%
  mutate(
    Temp2 = I(pmax(Temperature-20,0)),
    Day_Type = case_when(
      Holiday ~ "Holiday",
      wday(Date) %in% 2:6 ~ "Weekday",
      TRUE ~ "Weekend"))

11.1 Hierarchical and grouped time series

tourism <- tsibble::tourism %>%
  mutate(State = recode(State,
    `New South Wales` = "NSW",
    `Northern Territory` = "NT",
    `Queensland` = "QLD",
    `South Australia` = "SA",
    `Tasmania` = "TAS",
    `Victoria` = "VIC",
    `Western Australia` = "WA"
  ))
tourism_hts <- tourism %>%
  aggregate_key(State / Region, Trips = sum(Trips))
tourism_hts
## # A tsibble: 6,800 x 4 [1Q]
## # Key:       State, Region [85]
##    Quarter State        Region        Trips
##      <qtr> <chr*>       <chr*>        <dbl>
##  1 1998 Q1 <aggregated> <aggregated> 23182.
##  2 1998 Q2 <aggregated> <aggregated> 20323.
##  3 1998 Q3 <aggregated> <aggregated> 19827.
##  4 1998 Q4 <aggregated> <aggregated> 20830.
##  5 1999 Q1 <aggregated> <aggregated> 22087.
##  6 1999 Q2 <aggregated> <aggregated> 21458.
##  7 1999 Q3 <aggregated> <aggregated> 19914.
##  8 1999 Q4 <aggregated> <aggregated> 20028.
##  9 2000 Q1 <aggregated> <aggregated> 22339.
## 10 2000 Q2 <aggregated> <aggregated> 19941.
## # … with 6,790 more rows
tourism_hts %>%
  filter(is_aggregated(Region)) %>%
  autoplot(Trips) +
  labs(y = "Trips ('000)",
       title = "Australian tourism: national and states") +
  facet_wrap(vars(State), scales = "free_y", ncol = 3) +
  theme(legend.position = "none")

prison <- readr::read_csv("https://OTexts.com/fpp3/extrafiles/prison_population.csv") %>%
  mutate(Quarter = yearquarter(Date)) %>%
  select(-Date)  %>%
  as_tsibble(key = c(Gender, Legal, State, Indigenous),
             index = Quarter) %>%
  relocate(Quarter)
## Rows: 3072 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): State, Gender, Legal, Indigenous
## dbl  (1): Count
## date (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
prison_gts <- prison %>%
  aggregate_key(Gender * Legal * State, Count = sum(Count)/1e3)
prison_gts %>%
  filter(!is_aggregated(Gender), is_aggregated(Legal),
         is_aggregated(State)) %>%
  autoplot(Count) +
  labs(y = "Number of prisoners ('000)")

prison_gts %>%
  filter(!is_aggregated(Gender), !is_aggregated(Legal),
         !is_aggregated(State)) %>%
  mutate(Gender = as.character(Gender)) %>%
  ggplot(aes(x = Quarter, y = Count,
             group = Gender, colour=Gender)) +
  stat_summary(fun = sum, geom = "line") +
  labs(title = "Prison population by state and gender",
       y = "Number of prisoners ('000)") +
  facet_wrap(~ as.character(State),
             nrow = 1, scales = "free_y") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

tourism_full <- tourism %>%
  aggregate_key((State/Region) * Purpose, Trips = sum(Trips))

11.2 Single level approaches

tourism_states <- tourism %>%
  aggregate_key(State, Trips = sum(Trips,na.rm=TRUE))
fcasts_state <- tourism_states %>%
  filter(!is_aggregated(State)) %>%
  model(ets = ETS(Trips)) %>%
  forecast()

# Sum bottom-level forecasts to get top-level forecasts
fcasts_national <- fcasts_state %>%
  summarise(value = sum(Trips), .mean = mean(value))
tourism_states %>%
  model(ets = ETS(Trips)) %>%
  reconcile(bu = bottom_up(ets)) %>%
  forecast()
## # A fable: 144 x 5 [1Q]
## # Key:     State, .model [18]
##    State  .model Quarter         Trips .mean
##    <chr*> <chr>    <qtr>        <dist> <dbl>
##  1 ACT    ets    2018 Q1  N(701, 7651)  701.
##  2 ACT    ets    2018 Q2  N(717, 8032)  717.
##  3 ACT    ets    2018 Q3  N(734, 8440)  734.
##  4 ACT    ets    2018 Q4  N(750, 8882)  750.
##  5 ACT    ets    2019 Q1  N(767, 9368)  767.
##  6 ACT    ets    2019 Q2  N(784, 9905)  784.
##  7 ACT    ets    2019 Q3 N(800, 10503)  800.
##  8 ACT    ets    2019 Q4 N(817, 11171)  817.
##  9 ACT    bu     2018 Q1  N(701, 7651)  701.
## 10 ACT    bu     2018 Q2  N(717, 8032)  717.
## # … with 134 more rows
# data %>% 
#   aggregate_key() %>% 
#   model() %>%
#   reconcile() %>% 
#   forecast()

11.4 Forecasting Australian domestic tourism

tourism_full <- tourism %>%
  aggregate_key((State/Region) * Purpose, Trips = sum(Trips))

fit <- tourism_full %>%
  filter(year(Quarter) <= 2015) %>%
  model(base = ETS(Trips)) %>%
  reconcile(
    bu = bottom_up(base),
    ols = min_trace(base, method = "ols"),
    mint = min_trace(base, method = "mint_shrink"),
  )
fc <- fit %>% forecast(h = "2 years")
fc %>%
  filter(is_aggregated(Region), is_aggregated(Purpose)) %>%
  autoplot(
    tourism_full %>% filter(year(Quarter) >= 2011),
    level = NULL
  ) +
  labs(y = "Trips ('000)") +
  facet_wrap(vars(State), scales = "free_y")

fc %>%
  filter(is_aggregated(State), !is_aggregated(Purpose)) %>%
  autoplot(
    tourism_full %>% filter(year(Quarter) >= 2011),
    level = NULL
  ) +
  labs(y = "Trips ('000)") +
  facet_wrap(vars(Purpose), scales = "free_y")

fc %>%
  filter(is_aggregated(State), is_aggregated(Purpose)) %>%
  accuracy(
    data = tourism_full,
    measures = list(rmse = RMSE, mase = MASE)
  ) %>%
  group_by(.model) %>%
  summarise(rmse = mean(rmse), mase = mean(mase))
## # A tibble: 4 × 3
##   .model  rmse  mase
##   <chr>  <dbl> <dbl>
## 1 base   1721.  1.53
## 2 bu     3070.  3.16
## 3 mint   2158.  2.09
## 4 ols    1804.  1.63

11.6 Forecasting Australian prison population

fit <- prison_gts %>%
  filter(year(Quarter) <= 2014) %>%
  model(base = ETS(Count)) %>%
  reconcile(
    bottom_up = bottom_up(base),
    MinT = min_trace(base, method = "mint_shrink")
  )
fc <- fit %>% forecast(h = 8)
fc %>%
  filter(is_aggregated(State), is_aggregated(Gender),
         is_aggregated(Legal)) %>%
  autoplot(prison_gts, alpha = 0.7, level = 90) +
  labs(y = "Number of prisoners ('000)",
       title = "Australian prison population (total)")

fc %>%
  filter(
    .model %in% c("base", "MinT"),
    !is_aggregated(State), is_aggregated(Legal),
    is_aggregated(Gender)
  ) %>%
  autoplot(
    prison_gts %>% filter(year(Quarter) >= 2010),
    alpha = 0.7, level = 90
  ) +
  labs(title = "Prison population (by state)",
       y = "Number of prisoners ('000)") +
  facet_wrap(vars(State), scales = "free_y", ncol = 4) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

fc %>%
  filter(is_aggregated(State), is_aggregated(Gender),
         is_aggregated(Legal)) %>%
  accuracy(data = prison_gts,
           measures = list(mase = MASE,
                           ss = skill_score(CRPS)
                           )
           ) %>%
  group_by(.model) %>%
  summarise(mase = mean(mase), sspc = mean(ss) * 100)
## # A tibble: 3 × 3
##   .model     mase  sspc
##   <chr>     <dbl> <dbl>
## 1 base      1.72   55.9
## 2 bottom_up 1.84   33.5
## 3 MinT      0.895  76.8